# *********************************************************************** #
# ** Statistical Learning ** #
# ** Lab 11 - Basic Classifiers 1/2 ** #
# *********************************************************************** #
# Daily-sports ------------------------------------------------------------
# Consider the problem of distinguishing human activities performed while
# wearing inertial and magnetic sensor units.
# The dataset is publicly available at the UCI Machine Learning Repository
# where it is used to classify 19 activities performed by 8 people that
# wear sensor units on the chest, arms and legs.
# The 5-min signals were acquired at 25 Hz sampling frequency and divided
# into 5-sec segments so that 480(=60x8) signal segments are obtained for
# each activity.
#
# url: https://archive.ics.uci.edu/ml/datasets/Daily+and+Sports+Activities
#
# For this Lab, we will take the results on just 4 activities (walking,
# stepper, cross trainer, jumping) performed by a single person (#1) and as
# covariates the measurements taken by all the 9 sensors (x,y,z
# accelerometers, x,y,z gyroscopes, x,y,z magnetometers) on each of the 5
# units on torso (T), right arm (RA), left arm (LA), right leg (RL),
# left leg (LL) for a total of 45 features.
load("daily-sport.RData")
str(dailysport)
## 'data.frame': 30000 obs. of 46 variables:
## $ id : Factor w/ 4 levels "crosstr","jumping",..: 4 4 4 4 4 4 4 4 4 4 ...
## $ T-xAcc : num 8.72 8.62 8.93 9.07 9.05 ...
## $ T-yAcc : num 2.14 2.05 2.05 2.08 1.98 ...
## $ T-zAcc : num 3.83 3.45 3.28 3.23 3.41 ...
## $ T-xGyro : num 0.294 0.23 0.209 0.197 0.238 ...
## $ T-yGyro : num -0.252 -0.26 -0.344 -0.241 -0.098 ...
## $ T-zGyro : num -0.1506 -0.13245 -0.10109 -0.06141 -0.00907 ...
## $ T-xMag : num -0.577 -0.586 -0.596 -0.605 -0.61 ...
## $ T-yMag : num 0.1086 0.0974 0.0896 0.0825 0.075 ...
## $ T-zMag : num -0.697 -0.69 -0.684 -0.675 -0.672 ...
## $ RA-xAcc : num 8.59 8.5 9.09 9.36 9.34 ...
## $ RA-yAcc : num 3.51 3.77 3.8 3.77 3.94 ...
## $ RA-zAcc : num 1.58 1.64 1.62 1.68 1.83 ...
## $ RA-xGyro: num 0.492 0.252 0.168 0.25 0.269 ...
## $ RA-yGyro: num -0.224 -0.381 -0.421 -0.352 -0.276 ...
## $ RA-zGyro: num 0.532 0.65 0.706 0.597 0.373 ...
## $ RA-xMag : num -0.532 -0.55 -0.574 -0.594 -0.614 ...
## $ RA-yMag : num -0.476 -0.472 -0.461 -0.452 -0.443 ...
## $ RA-zMag : num -0.594 -0.582 -0.571 -0.559 -0.545 ...
## $ LA-xAcc : num 9.22 8.47 8.49 8.43 8.08 ...
## $ LA-yAcc : num -2.77 -2.76 -3.02 -3.07 -3.59 ...
## $ LA-zAcc : num 3.06 2.72 2.51 2.54 3.06 ...
## $ LA-xGyro: num 0.301 0.0294 -0.0944 -0.6035 -1.0688 ...
## $ LA-yGyro: num -0.0141 -0.0514 -0.0233 0.1012 0.1644 ...
## $ LA-zGyro: num -0.488 -0.379 -0.342 -0.325 -0.265 ...
## $ LA-xMag : num -0.484 -0.492 -0.503 -0.511 -0.52 ...
## $ LA-yMag : num 0.727 0.717 0.712 0.709 0.711 ...
## $ LA-zMag : num -0.256 -0.262 -0.261 -0.251 -0.229 ...
## $ RL-xAcc : num -11.65 -10.9 -10.09 -8.85 -8.46 ...
## $ RL-yAcc : num 0.112 -1.163 -1.985 -0.721 0.802 ...
## $ RL-zAcc : num 0.464 1.118 1.285 1.398 1.577 ...
## $ RL-xGyro: num -1.69 -1.268 -0.476 -0.305 -0.278 ...
## $ RL-yGyro: num 0.411 0.437 0.148 0.195 0.235 ...
## $ RL-zGyro: num 1.55 1.68 1.57 1.55 1.36 ...
## $ RL-xMag : num 0.794 0.763 0.727 0.689 0.646 ...
## $ RL-yMag : num -0.427 -0.485 -0.54 -0.587 -0.636 ...
## $ RL-zMag : num 0.192 0.177 0.171 0.166 0.164 ...
## $ LL-xAcc : num -9.74 -9.53 -9.97 -9.66 -9.77 ...
## $ LL-yAcc : num -0.434 -0.495 1.045 -0.436 -0.523 ...
## $ LL-zAcc : num -1.46 -1.55 -1.27 -1.04 -1.66 ...
## $ LL-xGyro: num -0.291 -0.348 -0.417 -0.234 -0.344 ...
## $ LL-yGyro: num 0.0618 0.0619 0.0789 -0.0764 0.0258 ...
## $ LL-zGyro: num 0.32 0.291 0.297 0.478 0.407 ...
## $ LL-xMag : num 0.8 0.805 0.811 0.817 0.823 ...
## $ LL-yMag : num 0.437 0.43 0.423 0.412 0.397 ...
## $ LL-zMag : num -0.166 -0.159 -0.149 -0.144 -0.143 ...
summary(dailysport)
## id T-xAcc T-yAcc T-zAcc
## crosstr:7500 Min. :-11.526 Min. :-14.0190 Min. :-8.363
## jumping:7500 1st Qu.: 6.584 1st Qu.: -0.3961 1st Qu.: 1.420
## stepper:7500 Median : 8.751 Median : 0.3336 Median : 2.848
## walking:7500 Mean : 9.227 Mean : 0.3792 Mean : 3.029
## 3rd Qu.: 10.936 3rd Qu.: 1.3094 3rd Qu.: 3.972
## Max. : 70.835 Max. : 14.0120 Max. :33.093
## T-xGyro T-yGyro T-zGyro
## Min. :-4.696900 Min. :-9.85770 Min. :-2.1615000
## 1st Qu.:-0.330675 1st Qu.:-0.26165 1st Qu.:-0.1770200
## Median : 0.016853 Median : 0.02464 Median :-0.0009475
## Mean : 0.001798 Mean : 0.01580 Mean : 0.0014212
## 3rd Qu.: 0.359052 3rd Qu.: 0.33622 3rd Qu.: 0.1753525
## Max. : 4.530300 Max. : 5.59380 Max. : 1.9199000
## T-xMag T-yMag T-zMag RA-xAcc
## Min. :-0.9246 Min. :-0.58331 Min. :-0.8606 Min. :-25.2760
## 1st Qu.:-0.7513 1st Qu.:-0.15989 1st Qu.:-0.6281 1st Qu.: -0.1473
## Median :-0.6952 Median :-0.06689 Median :-0.4968 Median : 2.7679
## Mean :-0.7000 Mean : 0.02185 Mean :-0.4154 Mean : 3.1426
## 3rd Qu.:-0.6171 3rd Qu.: 0.25817 3rd Qu.:-0.2799 3rd Qu.: 7.7249
## Max. :-0.4142 Max. : 0.56106 Max. : 0.2717 Max. : 21.5890
## RA-yAcc RA-zAcc RA-xGyro
## Min. :-10.587 Min. :-16.557 Min. :-7.75480
## 1st Qu.: 2.839 1st Qu.: 1.847 1st Qu.:-0.43318
## Median : 4.864 Median : 3.967 Median : 0.01838
## Mean : 6.086 Mean : 4.166 Mean : 0.01600
## 3rd Qu.: 7.529 3rd Qu.: 6.468 3rd Qu.: 0.47524
## Max. : 51.797 Max. : 39.010 Max. : 9.55990
## RA-yGyro RA-zGyro RA-xMag
## Min. :-6.432800 Min. :-4.343000 Min. :-0.94995
## 1st Qu.:-0.365402 1st Qu.:-0.654562 1st Qu.:-0.46215
## Median :-0.014138 Median :-0.017570 Median :-0.22478
## Mean :-0.009334 Mean : 0.002844 Mean :-0.25362
## 3rd Qu.: 0.362628 3rd Qu.: 0.635118 3rd Qu.:-0.01952
## Max. : 4.853700 Max. : 5.177800 Max. : 0.93707
## RA-yMag RA-zMag LA-xAcc LA-yAcc
## Min. :-1.1059 Min. :-1.1374 Min. :-31.6400 Min. :-57.939
## 1st Qu.:-0.7037 1st Qu.:-0.6716 1st Qu.: 0.6757 1st Qu.: -6.833
## Median :-0.5470 Median :-0.5559 Median : 4.3583 Median : -4.362
## Mean :-0.4846 Mean :-0.5029 Mean : 4.0236 Mean : -5.387
## 3rd Qu.:-0.2958 3rd Qu.:-0.3964 3rd Qu.: 8.1350 3rd Qu.: -2.387
## Max. : 0.3295 Max. : 0.3614 Max. : 50.9240 Max. : 30.246
## LA-zAcc LA-xGyro LA-yGyro
## Min. :-23.7440 Min. :-8.833200 Min. :-8.381800
## 1st Qu.: 0.7575 1st Qu.:-0.469542 1st Qu.:-0.382228
## Median : 3.2938 Median :-0.004280 Median :-0.024567
## Mean : 2.9418 Mean :-0.007128 Mean :-0.004787
## 3rd Qu.: 6.2104 3rd Qu.: 0.426720 3rd Qu.: 0.368470
## Max. : 24.6540 Max. :18.809000 Max. : 4.358900
## LA-zGyro LA-xMag LA-yMag
## Min. :-12.935000 Min. :-0.93608 Min. :-0.9267
## 1st Qu.: -0.624395 1st Qu.:-0.49630 1st Qu.: 0.3048
## Median : 0.027278 Median :-0.03208 Median : 0.4356
## Mean : -0.008746 Mean :-0.14015 Mean : 0.3854
## 3rd Qu.: 0.622652 3rd Qu.: 0.21392 3rd Qu.: 0.5705
## Max. : 5.679100 Max. : 0.89720 Max. : 0.8499
## LA-zMag RL-xAcc RL-yAcc RL-zAcc
## Min. :-1.1028 Min. :-89.704 Min. :-52.727 Min. :-47.9250
## 1st Qu.:-0.6849 1st Qu.:-11.917 1st Qu.: -1.034 1st Qu.: -1.1442
## Median :-0.4257 Median : -9.704 Median : 1.115 Median : -0.1701
## Mean :-0.2640 Mean :-10.051 Mean : 1.084 Mean : -0.2483
## 3rd Qu.: 0.1127 3rd Qu.: -7.624 3rd Qu.: 3.362 3rd Qu.: 0.8341
## Max. : 0.7589 Max. : 3.008 Max. : 80.427 Max. : 17.0360
## RL-xGyro RL-yGyro RL-zGyro
## Min. :-5.14820 Min. :-2.081100 Min. :-3.185800
## 1st Qu.:-0.52341 1st Qu.:-0.198860 1st Qu.:-1.127500
## Median : 0.04027 Median : 0.008418 Median :-0.064018
## Mean : 0.01691 Mean : 0.012355 Mean :-0.003762
## 3rd Qu.: 0.54979 3rd Qu.: 0.219955 3rd Qu.: 1.074200
## Max. : 6.01390 Max. : 3.311100 Max. : 3.665900
## RL-xMag RL-yMag RL-zMag LL-xAcc
## Min. :0.3556 Min. :-0.7856 Min. :-0.62804 Min. :-93.940
## 1st Qu.:0.4882 1st Qu.:-0.2595 1st Qu.:-0.32134 1st Qu.:-11.757
## Median :0.6394 Median :-0.1207 Median :-0.12695 Median : -9.556
## Mean :0.6510 Mean :-0.1197 Mean :-0.01181 Mean : -9.989
## 3rd Qu.:0.8168 3rd Qu.:-0.0391 3rd Qu.: 0.31716 3rd Qu.: -7.699
## Max. :1.0715 Max. : 0.7485 Max. : 0.57962 Max. : 3.592
## LL-yAcc LL-zAcc LL-xGyro
## Min. :-95.8980 Min. :-27.1510 Min. :-7.15920
## 1st Qu.: -4.1598 1st Qu.: -1.4636 1st Qu.:-0.56632
## Median : -1.7903 Median : -0.3309 Median :-0.06929
## Mean : -1.7952 Mean : -0.4361 Mean :-0.03791
## 3rd Qu.: 0.4201 3rd Qu.: 0.7115 3rd Qu.: 0.48713
## Max. : 49.4040 Max. : 33.0620 Max. :10.38100
## LL-yGyro LL-zGyro LL-xMag
## Min. :-3.605000 Min. :-4.115000 Min. :0.2951
## 1st Qu.:-0.227410 1st Qu.:-1.116200 1st Qu.:0.4771
## Median : 0.004304 Median : 0.018875 Median :0.6287
## Mean : 0.004683 Mean :-0.006432 Mean :0.6244
## 3rd Qu.: 0.231900 3rd Qu.: 1.148025 3rd Qu.:0.7691
## Max. : 4.207600 Max. : 3.526500 Max. :1.0377
## LL-yMag LL-zMag
## Min. :-0.7177 Min. :-0.52303
## 1st Qu.: 0.1047 1st Qu.:-0.27879
## Median : 0.2789 Median : 0.01363
## Mean : 0.2626 Mean :-0.02575
## 3rd Qu.: 0.4749 3rd Qu.: 0.14591
## Max. : 1.0249 Max. : 0.62156
# 30000 obs. of 46 variables...can apply any method we like!
# Train-Test split --------------------------------------------------------
# Pretty useful meta-package
# url: http://topepo.github.io/caret/index.html
# install.packages("caret")
suppressMessages(require(caret, quietly = T))
library(help = caret)
?createDataPartition # to get balanced train-test split
# Train-Test split
set.seed(13413) # set seed for reproducibility
idx.tr = createDataPartition(y = dailysport$id, p = .65, list = FALSE)
head(idx.tr)
## Resample1
## [1,] 1
## [2,] 2
## [3,] 3
## [4,] 4
## [5,] 6
## [6,] 7
daily.tr = dailysport[ idx.tr, ]
daily.te = dailysport[-idx.tr, ]
# Take a look (nicely balanced)
table(dailysport$id)
##
## crosstr jumping stepper walking
## 7500 7500 7500 7500
table(daily.tr$id)
##
## crosstr jumping stepper walking
## 4875 4875 4875 4875
table(daily.te$id)
##
## crosstr jumping stepper walking
## 2625 2625 2625 2625
# To avoid problems with the non-standard variable names, let's fix them
names(daily.tr) <- make.names(names(daily.tr))
names(daily.te) <- make.names(names(daily.te))
# LDA ---------------------------------------------------------------------
library(MASS)
?lda
# Fit the LDA model
mod1 = lda(id ~ ., data = daily.tr)
class(mod1)
## [1] "lda"
names(mod1)
## [1] "prior" "counts" "means" "scaling" "lev" "svd" "N"
## [8] "call" "terms" "xlevels"
# Prediction error on test
pred1 = predict(mod1, daily.te[,-1])
names(pred1)
## [1] "class" "posterior" "x"
# Take a look
head(pred1$class) # predicted class
## [1] walking walking walking walking walking walking
## Levels: crosstr jumping stepper walking
head(pred1$posterior) # per class posterior prob
## crosstr jumping stepper walking
## 5 4.974649e-46 5.493334e-77 6.921924e-66 1
## 8 3.102070e-41 8.662532e-75 1.464732e-66 1
## 15 1.629424e-33 8.300108e-90 1.386280e-50 1
## 16 1.853057e-34 2.834890e-90 1.016351e-51 1
## 17 1.902214e-36 8.638467e-95 8.031013e-50 1
## 24 1.085715e-38 1.166963e-92 8.013088e-48 1
# Missclassification error on test...quite an easy problem!
mean(pred1$class != daily.te$id)*100
## [1] 0
# Or, quick and dirty confusion matrix
table(pred1$class, daily.te$id) #...WOW...*this* never happens!
##
## crosstr jumping stepper walking
## crosstr 2625 0 0 0
## jumping 0 2625 0 0
## stepper 0 0 2625 0
## walking 0 0 0 2625
# Naive Bayes (parametric) ------------------------------------------------
# install.packages("e1071")
suppressMessages(require(e1071, quietly = T))
library(help = e1071)
?naiveBayes # in practice, this is a diagonal LDA!
# Fit the model
mod2 = naiveBayes(id ~ ., data = daily.tr, type = "raw")
class(mod2)
## [1] "naiveBayes"
names(mod2)
## [1] "apriori" "tables" "levels" "isnumeric" "call"
summary(mod2)
## Length Class Mode
## apriori 4 table numeric
## tables 45 -none- list
## levels 4 -none- character
## isnumeric 45 -none- logical
## call 5 -none- call
# Predict on the test set
pred2 = predict(mod2, daily.te[,-1]) # strangely takes a long time
head(pred2)
## [1] walking walking walking walking walking walking
## Levels: crosstr jumping stepper walking
# Missclassification error on test
mean(pred2 != daily.te$id)*100
## [1] 0
# Once again...
?confusionMatrix
## Help on topic 'confusionMatrix' was found in the following
## packages:
##
## Package Library
## caret /Library/Frameworks/R.framework/Versions/3.5/Resources/library
## ModelMetrics /Library/Frameworks/R.framework/Versions/3.5/Resources/library
##
##
## Using the first match ...
confusionMatrix(pred1$class, daily.te$id)
## Confusion Matrix and Statistics
##
## Reference
## Prediction crosstr jumping stepper walking
## crosstr 2625 0 0 0
## jumping 0 2625 0 0
## stepper 0 0 2625 0
## walking 0 0 0 2625
##
## Overall Statistics
##
## Accuracy : 1
## 95% CI : (0.9996, 1)
## No Information Rate : 0.25
## P-Value [Acc > NIR] : < 2.2e-16
##
## Kappa : 1
##
## Mcnemar's Test P-Value : NA
##
## Statistics by Class:
##
## Class: crosstr Class: jumping Class: stepper
## Sensitivity 1.00 1.00 1.00
## Specificity 1.00 1.00 1.00
## Pos Pred Value 1.00 1.00 1.00
## Neg Pred Value 1.00 1.00 1.00
## Prevalence 0.25 0.25 0.25
## Detection Rate 0.25 0.25 0.25
## Detection Prevalence 0.25 0.25 0.25
## Balanced Accuracy 1.00 1.00 1.00
## Class: walking
## Sensitivity 1.00
## Specificity 1.00
## Pos Pred Value 1.00
## Neg Pred Value 1.00
## Prevalence 0.25
## Detection Rate 0.25
## Detection Prevalence 0.25
## Balanced Accuracy 1.00
# Multiclass Logistic -----------------------------------------------------
# The previous methods do not give "natively" any measure of variable
# importance. Lets try other "classics"...
suppressWarnings(require(viridis, quietly = T))
suppressWarnings(require(nnet, quietly = T)) # multiclass-logistic
?multinom
# Fit the model
mod3 = multinom(id ~ ., data = daily.tr, maxit = 500)
## # weights: 188 (138 variable)
## initial value 27032.740042
## iter 10 value 5824.919407
## iter 20 value 3634.465904
## iter 30 value 3045.613043
## iter 40 value 2590.899781
## iter 50 value 2198.433191
## iter 60 value 1650.849970
## iter 70 value 1197.284633
## iter 80 value 617.136851
## iter 90 value 224.817654
## iter 100 value 54.114870
## iter 110 value 7.056134
## iter 120 value 1.043503
## iter 130 value 0.236351
## iter 140 value 0.085437
## iter 150 value 0.024051
## iter 160 value 0.014635
## iter 170 value 0.005450
## iter 180 value 0.002594
## iter 190 value 0.001905
## iter 200 value 0.000307
## iter 210 value 0.000157
## iter 220 value 0.000100
## iter 220 value 0.000100
## iter 220 value 0.000070
## final value 0.000070
## converged
class(mod3)
## [1] "multinom" "nnet"
names(mod3)
## [1] "n" "nunits" "nconn" "conn"
## [5] "nsunits" "decay" "entropy" "softmax"
## [9] "censored" "value" "wts" "convergence"
## [13] "fitted.values" "residuals" "lev" "call"
## [17] "terms" "weights" "deviance" "rank"
## [21] "lab" "coefnames" "vcoefnames" "xlevels"
## [25] "edf" "AIC"
# Take a look at the coeff's and their standard errors
# Each row of values corresponds to a profile of that specific level of the
# response against the baseline level of id = "crosstr.
summary(mod3)
## Call:
## multinom(formula = id ~ ., data = daily.tr, maxit = 500)
##
## Coefficients:
## (Intercept) T.xAcc T.yAcc T.zAcc T.xGyro T.yGyro
## jumping -119.9712 0.09431812 2.596169 0.9003049 5.871740 8.551042
## stepper -334.6033 0.94096342 1.068385 3.3374218 1.406063 -6.471650
## walking -191.1198 0.91520474 -1.436747 2.2903229 -5.018481 10.063600
## T.zGyro T.xMag T.yMag T.zMag RA.xAcc RA.yAcc
## jumping -6.203039 265.9722 274.30636 277.0794 -0.8162727 1.354955
## stepper 4.614142 199.8532 172.69401 173.0864 -1.2170213 1.432096
## walking -5.536947 108.7806 63.18525 134.0281 -0.8021364 -1.229246
## RA.zAcc RA.xGyro RA.yGyro RA.zGyro RA.xMag RA.yMag
## jumping 1.9346590 -2.2608091 9.1695477 -3.4890735 23.428284 -40.974227
## stepper 0.9765970 0.1589911 0.8730202 -2.8143638 63.262345 -4.949428
## walking 0.9433189 -1.8875501 3.2566525 -0.6451893 -9.306769 68.497576
## RA.zMag LA.xAcc LA.yAcc LA.zAcc LA.xGyro LA.yGyro
## jumping -14.92698 1.8402941 0.1670749 2.0505896 -5.586349 -5.976765
## stepper -44.29088 1.4324164 0.6013660 1.8751510 -9.582874 1.666756
## walking 32.98697 -0.9940291 -0.7623836 0.7044377 -3.930608 -5.075649
## LA.zGyro LA.xMag LA.yMag LA.zMag RL.xAcc RL.yAcc
## jumping -2.37033834 12.1671673 -20.02364 18.89019 -0.9483074 0.03581965
## stepper -2.78915209 0.6748684 -33.59423 90.55630 -1.1571255 0.46241635
## walking 0.01703841 -95.8069393 72.16841 -32.63456 -0.7089812 -0.27252263
## RL.zAcc RL.xGyro RL.yGyro RL.zGyro RL.xMag RL.yMag
## jumping -0.4889773 -7.992962 -7.146648 3.014234 322.5412 -41.28186
## stepper -2.7582201 -11.049424 -21.793877 -7.357621 442.5157 -61.11306
## walking -0.6156858 -6.891270 -20.932980 -1.665089 220.0202 -62.49129
## RL.zMag LL.xAcc LL.yAcc LL.zAcc LL.xGyro LL.yGyro
## jumping 63.82113 0.4139987 -0.4894601 0.7059678 -1.1941200 2.541247
## stepper -42.52960 0.5638191 0.3200834 1.4854076 0.8468322 7.040384
## walking 104.20619 -0.6532616 -0.5306087 0.1616451 2.5619671 6.861792
## LL.zGyro LL.xMag LL.yMag LL.zMag
## jumping -1.177770 223.9820 107.17714 58.82988
## stepper 6.202517 347.9663 82.49983 65.86803
## walking 3.975023 195.0242 69.36066 47.97993
##
## Std. Errors:
## (Intercept) T.xAcc T.yAcc T.zAcc T.xGyro T.yGyro T.zGyro
## jumping 1656.5141 2600.457 1562.688 4005.482 3978.390 4861.310 1492.252
## stepper 603.8159 2860.045 1854.908 3381.876 2821.328 2493.370 1295.352
## walking 1352.2810 2984.089 3502.452 4661.744 1889.845 2027.137 1227.451
## T.xMag T.yMag T.zMag RA.xAcc RA.yAcc RA.zAcc RA.xGyro
## jumping 970.6951 839.4970 791.8451 2777.046 3433.319 2469.455 4334.161
## stepper 716.9538 679.9532 1245.8416 1922.071 2280.410 1469.470 1589.596
## walking 874.8435 782.9178 1462.5308 4420.329 3885.305 4625.852 3742.918
## RA.yGyro RA.zGyro RA.xMag RA.yMag RA.zMag LA.xAcc LA.yAcc
## jumping 3545.846 2522.311 591.7286 811.8736 1141.6400 1514.700 3283.178
## stepper 5176.006 3989.645 763.9948 897.0333 924.5238 3280.305 3665.341
## walking 3138.834 2384.486 698.1611 1149.4382 1382.4660 4403.968 4471.506
## LA.zAcc LA.xGyro LA.yGyro LA.zGyro LA.xMag LA.yMag LA.zMag
## jumping 2447.472 3467.671 2069.349 3364.105 668.6878 1310.157 1253.7886
## stepper 3337.972 3648.134 2635.278 2447.612 763.9130 1598.675 697.5111
## walking 4372.615 4536.192 2149.171 3485.364 1027.0284 1402.976 820.8999
## RL.xAcc RL.yAcc RL.zAcc RL.xGyro RL.yGyro RL.zGyro RL.xMag
## jumping 3119.990 1706.880 3954.498 5254.300 1353.893 1984.906 824.4463
## stepper 3950.530 2774.309 2903.605 3513.349 1034.199 3176.799 625.8474
## walking 3295.081 1957.657 3474.412 4872.635 1739.112 2522.482 893.0134
## RL.yMag RL.zMag LL.xAcc LL.yAcc LL.zAcc LL.xGyro LL.yGyro
## jumping 476.1609 1254.9092 4166.593 2512.477 3420.162 3912.207 776.001
## stepper 1310.7632 955.4004 3237.273 2413.297 4434.577 4362.800 1637.360
## walking 1268.4705 1016.0911 2977.161 2953.667 4487.123 4656.093 1683.356
## LL.zGyro LL.xMag LL.yMag LL.zMag
## jumping 2186.317 711.4678 630.7879 1088.5570
## stepper 3013.659 352.1035 1092.6664 746.7053
## walking 2068.405 780.7586 1423.8971 732.8488
##
## Residual Deviance: 0.0001390481
## AIC: 276.0001
t(coef(mod3)) # More readable
## jumping stepper walking
## (Intercept) -119.97123464 -334.6032874 -191.11983157
## T.xAcc 0.09431812 0.9409634 0.91520474
## T.yAcc 2.59616940 1.0683852 -1.43674703
## T.zAcc 0.90030486 3.3374218 2.29032289
## T.xGyro 5.87173962 1.4060633 -5.01848129
## T.yGyro 8.55104232 -6.4716504 10.06359959
## T.zGyro -6.20303897 4.6141416 -5.53694719
## T.xMag 265.97215207 199.8531503 108.78055104
## T.yMag 274.30636109 172.6940136 63.18524617
## T.zMag 277.07939323 173.0864187 134.02805229
## RA.xAcc -0.81627274 -1.2170213 -0.80213639
## RA.yAcc 1.35495531 1.4320958 -1.22924629
## RA.zAcc 1.93465899 0.9765970 0.94331889
## RA.xGyro -2.26080905 0.1589911 -1.88755010
## RA.yGyro 9.16954775 0.8730202 3.25665250
## RA.zGyro -3.48907353 -2.8143638 -0.64518930
## RA.xMag 23.42828376 63.2623453 -9.30676873
## RA.yMag -40.97422728 -4.9494284 68.49757605
## RA.zMag -14.92698092 -44.2908849 32.98697149
## LA.xAcc 1.84029408 1.4324164 -0.99402905
## LA.yAcc 0.16707486 0.6013660 -0.76238356
## LA.zAcc 2.05058965 1.8751510 0.70443771
## LA.xGyro -5.58634853 -9.5828745 -3.93060837
## LA.yGyro -5.97676519 1.6667561 -5.07564850
## LA.zGyro -2.37033834 -2.7891521 0.01703841
## LA.xMag 12.16716727 0.6748684 -95.80693933
## LA.yMag -20.02363776 -33.5942266 72.16841381
## LA.zMag 18.89018920 90.5562984 -32.63455988
## RL.xAcc -0.94830735 -1.1571255 -0.70898121
## RL.yAcc 0.03581965 0.4624164 -0.27252263
## RL.zAcc -0.48897733 -2.7582201 -0.61568580
## RL.xGyro -7.99296241 -11.0494238 -6.89126975
## RL.yGyro -7.14664837 -21.7938772 -20.93298042
## RL.zGyro 3.01423441 -7.3576212 -1.66508899
## RL.xMag 322.54122169 442.5157163 220.02022439
## RL.yMag -41.28185710 -61.1130625 -62.49128907
## RL.zMag 63.82112628 -42.5296043 104.20619487
## LL.xAcc 0.41399870 0.5638191 -0.65326156
## LL.yAcc -0.48946013 0.3200834 -0.53060875
## LL.zAcc 0.70596784 1.4854076 0.16164513
## LL.xGyro -1.19412004 0.8468322 2.56196715
## LL.yGyro 2.54124721 7.0403842 6.86179205
## LL.zGyro -1.17777023 6.2025173 3.97502310
## LL.xMag 223.98197539 347.9663421 195.02424127
## LL.yMag 107.17714394 82.4998310 69.36066225
## LL.zMag 58.82988401 65.8680280 47.97992635
# 2-tailed z test
z <- summary(mod3)$coefficients/summary(mod3)$standard.errors
head(z)
## (Intercept) T.xAcc T.yAcc T.zAcc T.xGyro
## jumping -0.07242391 3.626983e-05 0.0016613483 0.0002247681 0.0014759087
## stepper -0.55414786 3.290030e-04 0.0005759776 0.0009868551 0.0004983693
## walking -0.14133145 3.066948e-04 -0.0004102118 0.0004913018 -0.0026554988
## T.yGyro T.zGyro T.xMag T.yMag T.zMag
## jumping 0.001759000 -0.004156831 0.2740018 0.32675085 0.34991616
## stepper -0.002595543 0.003562075 0.2787532 0.25397928 0.13893132
## walking 0.004964440 -0.004510932 0.1243429 0.08070483 0.09164118
## RA.xAcc RA.yAcc RA.zAcc RA.xGyro
## jumping -0.0002939356 0.0003946488 0.0007834356 -0.0005216256
## stepper -0.0006331822 0.0006279993 0.0006645915 0.0001000198
## walking -0.0001814653 -0.0003163834 0.0002039233 -0.0005042991
## RA.yGyro RA.zGyro RA.xMag RA.yMag RA.zMag
## jumping 0.0025859973 -0.0013832847 0.03959296 -0.050468727 -0.01307503
## stepper 0.0001686668 -0.0007054171 0.08280468 -0.005517553 -0.04790670
## walking 0.0010375358 -0.0002705779 -0.01333040 0.059592224 0.02386096
## LA.xAcc LA.yAcc LA.zAcc LA.xGyro
## jumping 0.0012149559 5.088816e-05 0.0008378398 -0.0016109798
## stepper 0.0004366717 1.640682e-04 0.0005617635 -0.0026267882
## walking -0.0002257121 -1.704982e-04 0.0001611022 -0.0008664996
## LA.yGyro LA.zGyro LA.xMag LA.yMag LA.zMag
## jumping -0.0028882352 -7.045971e-04 0.0181955881 -0.01528339 0.01506649
## stepper 0.0006324782 -1.139540e-03 0.0008834362 -0.02101379 0.12982775
## walking -0.0023616777 4.888560e-06 -0.0932855789 0.05143950 -0.03975461
## RL.xAcc RL.yAcc RL.zAcc RL.xGyro
## jumping -0.0003039457 2.098545e-05 -0.0001236509 -0.001521223
## stepper -0.0002929038 1.666781e-04 -0.0009499294 -0.003144984
## walking -0.0002151635 -1.392086e-04 -0.0001772057 -0.001414280
## RL.yGyro RL.zGyro RL.xMag RL.yMag RL.zMag
## jumping -0.005278592 0.0015185777 0.3912216 -0.08669728 0.05085717
## stepper -0.021073194 -0.0023160490 0.7070665 -0.04662403 -0.04451495
## walking -0.012036595 -0.0006600994 0.2463795 -0.04926507 0.10255596
## LL.xAcc LL.yAcc LL.zAcc LL.xGyro LL.yGyro
## jumping 9.936143e-05 -0.0001948118 2.064136e-04 -0.0003052292 0.003274799
## stepper 1.741648e-04 0.0001326332 3.349604e-04 0.0001941029 0.004299838
## walking -2.194243e-04 -0.0001796441 3.602422e-05 0.0005502397 0.004076257
## LL.zGyro LL.xMag LL.yMag LL.zMag
## jumping -0.0005387006 0.3148168 0.16990995 0.05404392
## stepper 0.0020581353 0.9882503 0.07550322 0.08821154
## walking 0.0019217822 0.2497882 0.04871185 0.06547043
# Drop the intercept and plot the log(|z|)
lz = t(log(abs(z[,-1])))
# Plot
xcol = viridis(nlevels(daily.tr[,1]), alpha = c(1,.7,.5,.3))
plot(c(2,44), range(lz), type = "n",
xlab = "Variable Index", ylab = "log(|z|)", xlim = c(2,44),
sub = "(* = significat at 5%)")
xpl <- c(-1, 3*(1:14) + .5, 50)
for (i in 0:7){
rect(xpl[2*i + 1], -20, xpl[2*i + 2], 10,
col = rgb(0,0,0,.2), border = F)
text(3*(1:15) - 1, -7, gsub("z","",names(daily.tr[,-1])[3*(1:15)]),
cex = .7, pos = 1)
}
abline(h = 0, col = rgb(0,0,0,.3), lty = 1, lwd = 2)
matplot(lz, type = "h", col = xcol, lty = 1, lwd = c(1,3,5,6),
add = TRUE)
legend("bottom", rownames(z), col = xcol, lwd = c(1,3,5,6), lty = 1,
bty = "n", cex = .7, horiz = T)
# The test
p <- (1 - pnorm(abs(z), 0, 1))*2
t(p)
## jumping stepper walking
## (Intercept) 0.9422646 0.5794777 0.8876081
## T.xAcc 0.9999711 0.9997375 0.9997553
## T.yAcc 0.9986744 0.9995404 0.9996727
## T.zAcc 0.9998207 0.9992126 0.9996080
## T.xGyro 0.9988224 0.9996024 0.9978812
## T.yGyro 0.9985965 0.9979291 0.9960390
## T.zGyro 0.9966833 0.9971579 0.9964008
## T.xMag 0.7840833 0.7804342 0.9010438
## T.yMag 0.7438563 0.7995116 0.9356767
## T.zMag 0.7264016 0.8895044 0.9269831
## RA.xAcc 0.9997655 0.9994948 0.9998552
## RA.yAcc 0.9996851 0.9994989 0.9997476
## RA.zAcc 0.9993749 0.9994697 0.9998373
## RA.xGyro 0.9995838 0.9999202 0.9995976
## RA.yGyro 0.9979367 0.9998654 0.9991722
## RA.zGyro 0.9988963 0.9994372 0.9997841
## RA.xMag 0.9684176 0.9340068 0.9893642
## RA.yMag 0.9597489 0.9955977 0.9524804
## RA.zMag 0.9895679 0.9617906 0.9809635
## LA.xAcc 0.9990306 0.9996516 0.9998199
## LA.yAcc 0.9999594 0.9998691 0.9998640
## LA.zAcc 0.9993315 0.9995518 0.9998715
## LA.xGyro 0.9987146 0.9979041 0.9993086
## LA.yGyro 0.9976955 0.9994954 0.9981157
## LA.zGyro 0.9994378 0.9990908 0.9999961
## LA.xMag 0.9854828 0.9992951 0.9256767
## LA.yMag 0.9878061 0.9832347 0.9589753
## LA.zMag 0.9879791 0.8967027 0.9682888
## RL.xAcc 0.9997575 0.9997663 0.9998283
## RL.yAcc 0.9999833 0.9998670 0.9998889
## RL.zAcc 0.9999013 0.9992421 0.9998586
## RL.xGyro 0.9987862 0.9974907 0.9988716
## RL.yGyro 0.9957883 0.9831873 0.9903964
## RL.zGyro 0.9987884 0.9981521 0.9994733
## RL.xMag 0.6956334 0.4795252 0.8053885
## RL.yMag 0.9309121 0.9628129 0.9607081
## RL.zMag 0.9594393 0.9644939 0.9183154
## LL.xAcc 0.9999207 0.9998610 0.9998249
## LL.yAcc 0.9998446 0.9998942 0.9998567
## LL.zAcc 0.9998353 0.9997327 0.9999713
## LL.xGyro 0.9997565 0.9998451 0.9995610
## LL.yGyro 0.9973871 0.9965692 0.9967476
## LL.zGyro 0.9995702 0.9983578 0.9984666
## LL.xMag 0.7529008 0.3230301 0.8027512
## LL.yMag 0.8650810 0.9398143 0.9611489
## LL.zMag 0.9569002 0.9297085 0.9477994
t.res = t(p < 0.05)
t.res
## jumping stepper walking
## (Intercept) FALSE FALSE FALSE
## T.xAcc FALSE FALSE FALSE
## T.yAcc FALSE FALSE FALSE
## T.zAcc FALSE FALSE FALSE
## T.xGyro FALSE FALSE FALSE
## T.yGyro FALSE FALSE FALSE
## T.zGyro FALSE FALSE FALSE
## T.xMag FALSE FALSE FALSE
## T.yMag FALSE FALSE FALSE
## T.zMag FALSE FALSE FALSE
## RA.xAcc FALSE FALSE FALSE
## RA.yAcc FALSE FALSE FALSE
## RA.zAcc FALSE FALSE FALSE
## RA.xGyro FALSE FALSE FALSE
## RA.yGyro FALSE FALSE FALSE
## RA.zGyro FALSE FALSE FALSE
## RA.xMag FALSE FALSE FALSE
## RA.yMag FALSE FALSE FALSE
## RA.zMag FALSE FALSE FALSE
## LA.xAcc FALSE FALSE FALSE
## LA.yAcc FALSE FALSE FALSE
## LA.zAcc FALSE FALSE FALSE
## LA.xGyro FALSE FALSE FALSE
## LA.yGyro FALSE FALSE FALSE
## LA.zGyro FALSE FALSE FALSE
## LA.xMag FALSE FALSE FALSE
## LA.yMag FALSE FALSE FALSE
## LA.zMag FALSE FALSE FALSE
## RL.xAcc FALSE FALSE FALSE
## RL.yAcc FALSE FALSE FALSE
## RL.zAcc FALSE FALSE FALSE
## RL.xGyro FALSE FALSE FALSE
## RL.yGyro FALSE FALSE FALSE
## RL.zGyro FALSE FALSE FALSE
## RL.xMag FALSE FALSE FALSE
## RL.yMag FALSE FALSE FALSE
## RL.zMag FALSE FALSE FALSE
## LL.xAcc FALSE FALSE FALSE
## LL.yAcc FALSE FALSE FALSE
## LL.zAcc FALSE FALSE FALSE
## LL.xGyro FALSE FALSE FALSE
## LL.yGyro FALSE FALSE FALSE
## LL.zGyro FALSE FALSE FALSE
## LL.xMag FALSE FALSE FALSE
## LL.yMag FALSE FALSE FALSE
## LL.zMag FALSE FALSE FALSE
for (i in 1:ncol(t.res)){
pass = which(t.res[-1,i])
points(pass, lz[pass,i], pch = "*", col = xcol[i], cex = 2)
}

# The ratio of the probability of choosing one outcome category over the
# probability of choosing the baseline category (here "crosstr") is often
# referred as **relative risk** and it is also sometimes referred as
# **odds** . The **relative risk** is the linear model exponentiated,
# leading to the fact that the exponentiated regression coefficients
# are relative risk ratios for a unit change in the predictor variable.
# So extract the coefficients from the model and exponentiate
t(exp(coef(mod3))) # risk ratios
## jumping stepper walking
## (Intercept) 7.891414e-53 4.826571e-146 9.947450e-84
## T.xAcc 1.098909e+00 2.562449e+00 2.497286e+00
## T.yAcc 1.341226e+01 2.910676e+00 2.376997e-01
## T.zAcc 2.460353e+00 2.814647e+01 9.878127e+00
## T.xGyro 3.548658e+02 4.079863e+00 6.614565e-03
## T.yGyro 5.172143e+03 1.546671e-03 2.347285e+04
## T.zGyro 2.023273e-03 1.009012e+02 3.938532e-03
## T.xMag 3.237710e+115 6.239077e+86 1.749013e+47
## T.yMag 1.348152e+119 1.000132e+75 2.760602e+27
## T.zMag 2.158000e+120 1.480732e+75 1.613034e+58
## RA.xAcc 4.420763e-01 2.961109e-01 4.483700e-01
## RA.yAcc 3.876588e+00 4.187466e+00 2.925130e-01
## RA.zAcc 6.921683e+00 2.655405e+00 2.568492e+00
## RA.xGyro 1.042661e-01 1.172327e+00 1.514424e-01
## RA.yGyro 9.600282e+03 2.394131e+00 2.596248e+01
## RA.zGyro 3.052914e-02 5.994284e-02 5.245632e-01
## RA.xMag 1.495458e+10 2.981862e+27 9.080750e-05
## RA.yMag 1.603685e-18 7.087459e-03 5.599114e+29
## RA.zMag 3.290747e-07 5.817188e-20 2.118652e+14
## LA.xAcc 6.298390e+00 4.188809e+00 3.700826e-01
## LA.yAcc 1.181843e+00 1.824610e+00 4.665530e-01
## LA.zAcc 7.772483e+00 6.521804e+00 2.022709e+00
## LA.xGyro 3.748691e-03 6.889861e-05 1.963173e-02
## LA.yGyro 2.537020e-03 5.294963e+00 6.247034e-03
## LA.zGyro 9.344910e-02 6.147332e-02 1.017184e+00
## LA.xMag 1.923683e+05 1.963775e+00 2.463627e-42
## LA.yMag 2.013004e-09 2.571655e-15 2.199601e+31
## LA.zMag 1.599208e+08 2.128632e+39 6.714145e-15
## RL.xAcc 3.873962e-01 3.143886e-01 4.921453e-01
## RL.yAcc 1.036469e+00 1.587906e+00 7.614562e-01
## RL.zAcc 6.132532e-01 6.340452e-02 5.402702e-01
## RL.xGyro 3.378318e-04 1.589631e-05 1.016622e-03
## RL.yGyro 7.874991e-04 3.427989e-10 8.108156e-10
## RL.zGyro 2.037349e+01 6.377137e-04 1.891738e-01
## RL.xMag 1.196390e+140 1.521016e+192 3.577415e+95
## RL.yMag 1.179009e-18 2.876962e-27 7.250667e-28
## RL.zMag 5.213903e+27 3.385537e-19 1.803746e+45
## LL.xAcc 1.512855e+00 1.757371e+00 5.203459e-01
## LL.yAcc 6.129572e-01 1.377243e+00 5.882468e-01
## LL.zAcc 2.025806e+00 4.416765e+00 1.175443e+00
## LL.xGyro 3.029704e-01 2.332247e+00 1.296129e+01
## LL.yGyro 1.269550e+01 1.141826e+03 9.550771e+02
## LL.zGyro 3.079647e-01 4.939910e+02 5.325135e+01
## LL.xMag 1.879905e+97 1.317839e+151 4.988291e+84
## LL.yMag 3.519186e+46 6.748719e+35 1.327250e+30
## LL.zMag 3.544002e+25 4.037580e+28 6.877289e+20
# So, the relative risk ratio for a one-unit increase in the variable
# "T-yGyro" is 0.277 for being in "jumping" vs. "crosstr".
# We can calculate predicted probabilities for each of our outcome levels
# using the fitted function.
head(pp <- fitted(mod3))
## crosstr jumping stepper walking
## 1 3.241515e-56 3.429236e-10 2.244078e-09 1
## 2 8.008832e-53 3.717713e-15 4.093399e-16 1
## 3 4.063881e-51 2.222220e-22 1.552236e-21 1
## 4 3.160934e-49 7.470229e-26 1.178402e-25 1
## 6 6.217021e-48 9.043649e-33 1.288195e-30 1
## 7 1.683406e-46 2.451412e-35 3.751841e-32 1
# Predict on test
pred3 = predict(mod3, daily.te[,-1])
head(pred3)
## [1] walking walking walking walking walking walking
## Levels: crosstr jumping stepper walking
# Missclassification error on test
mean(pred3 != daily.te$id)*100
## [1] 0.00952381
# LASSO Logistic Regression -----------------------------------------------
suppressMessages(require(glmnet, quietly = T)) # l1 penalized-glm
?glmnet
suppressMessages(require(doMC, quietly = T))
registerDoMC( cores = detectCores() )
# Deviance
mod4 = cv.glmnet(as.matrix(daily.tr[,-1]), daily.tr[,1], family = "multinomial",
type.measure = "deviance",
parallel = T)
# Missclassification
mod5 = cv.glmnet(as.matrix(daily.tr[,-1]), daily.tr[,1], family = "multinomial",
type.measure = "class",
parallel = T)
# Let's take a look at the optimal lambda values (pretty close to each other)
log(c("min" = mod4$lambda.min, "1se" = mod4$lambda.1se))
## min 1se
## -8.080373 -8.080373
log(c("min" = mod5$lambda.min, "1se" = mod5$lambda.1se))
## min 1se
## -5.103293 -5.103293
# Now plot (optimal around 5-7 variables!)
par(mfrow = c(1,2))
plot(mod4)
plot(mod5)

par(mfrow = c(1,1))
# Take a look at the optimal non-zero coeff's under the two
# (cross-validated) losses.
cc4 <- lapply(coef(mod4), function(x) as.vector(x))
cc5 <- lapply(coef(mod5), function(x) as.vector(x))
cc4.mat <- matrix(unlist(cc4), ncol = length(cc4), byrow = T)
cc5.mat <- matrix(unlist(cc5), ncol = length(cc5), byrow = T)
cc4.mat <- cc4.mat[-1,]
cc5.mat <- cc5.mat[-1,]
# Plot the coeff's matrix & Get the list of active coeff's (per class)
par(mfrow = c(2,2))
act.coeff4 <- list()
act.coeff5 <- list()
for (ix in 1:ncol(cc4.mat)){
# Get the support
jnk4 <- cc4.mat[,ix]; jnk4n <- jnk4
jnk5 <- cc5.mat[,ix]; jnk5n <- jnk5
idx.act4 = which( abs(jnk4) > 0 )
idx.act5 = which( abs(jnk5) > 0 )
act.coeff4[[ix]] <- idx.act4
act.coeff5[[ix]] <- idx.act5
# Inactivated names
cf.name4 <- names(daily.tr[,-1]); cf.name4[-idx.act4] <- NA; jnk4n[-idx.act4] <- NA
cf.name5 <- names(daily.tr[,-1]); cf.name5[-idx.act5] <- NA; jnk5n[-idx.act4] <- NA
# Plot
par(mar = c(2,2,1,1))
# 1 #
plot(jnk4n, type = "h", ylim = 1.05*range(cc4.mat),
main = "", xaxt = "n", xlab = "", ylab = "")
pos.text <- (1*(jnk4 < 0)) + (3*(jnk4 >= 0))
text(jnk4, cf.name4, cex = .6, pos = pos.text)
# 2 #
lines(jnk5n, type = "h", col = rgb(1,0,0,.35), lwd = 4)
pos.text <- (1*(jnk5 < 0)) + (3*(jnk5 >= 0))
text(jnk5, cf.name5, cex = .6, pos = pos.text, col = rgb(1,0,0,.6))
# Annotation
legend("topleft", c("Deviance", "Miss-classification"),
col = c("black", rgb(1,0,0,.35)), lwd = c(1,4),
cex = .8, bty = "n")
legend("bottom", paste("Class = ", ix, sep = ""), cex = .8)
abline(h = 0)
}

par(mfrow = c(1,1))
# Active coeff's (per class)
act.coeff4
## [[1]]
## [1] 2 18 23 27 32 34 35 37 41
##
## [[2]]
## [1] 4 18 34 36
##
## [[3]]
## [1] 2 8 11 13 23 31 34 37 38
##
## [[4]]
## [1] 1 4 10 33 40
act.coeff5
## [[1]]
## [1] 2 23 27 32 35 37
##
## [[2]]
## [1] 18
##
## [[3]]
## [1] 2 8 11 13 23 31 34 38
##
## [[4]]
## [1] 1 4 10 33 40
# Prediction --------------------------------------------------------------
?predict.glmnet
?predict.cv.glmnet
# More statistics
# http://topepo.github.io/caret/index.html
suppressMessages(require(caret, quietly = T))
library(help = caret)
?confusionMatrix
## Help on topic 'confusionMatrix' was found in the following
## packages:
##
## Package Library
## ModelMetrics /Library/Frameworks/R.framework/Versions/3.5/Resources/library
## caret /Library/Frameworks/R.framework/Versions/3.5/Resources/library
##
##
## Using the first match ...
# OK, now for real using the optimal lambdas
previ4a = predict(mod4, newx = as.matrix(daily.te[,-1]), type = "class", s = mod4$lambda.1se)
previ4b = predict(mod4, newx = as.matrix(daily.te[,-1]), type = "class", s = mod4$lambda.min)
c("Dev-1se" = mean(previ4a != daily.te[,1])*100, "Dev-min" = mean(previ4b != daily.te[,1])*100)
## Dev-1se Dev-min
## 0 0
previ5a = predict(mod5, newx = as.matrix(daily.te[,-1]), type = "class", s = mod5$lambda.1se)
previ5b = predict(mod5, newx = as.matrix(daily.te[,-1]), type = "class", s = mod5$lambda.min)
c("MC-1se" = mean(previ5a != daily.te[,1])*100, "MC-min" = mean(previ5b != daily.te[,1])*100)
## MC-1se MC-min
## 0 0
# It seems it doesn't make a big difference...and we're using less than 10 vars!
# knn ---------------------------------------------------------------------
library(help = class)
?class::knn
?class::knn1
?class::knn.cv
start <- Sys.time()
mod6a <- class::knn(daily.tr[,-1], daily.te[,-1], daily.tr[,1],
k = 3, prob = T)
tempo_a <- Sys.time() - start
tempo_a
## Time difference of 17.55356 secs
# Take a look
names(attributes(mod6a))
## [1] "levels" "class" "prob"
head(attributes(mod6a)$prob) # strongly "opinionated"!
## [1] 1 1 1 1 1 1
head(mod6a)
## [1] walking walking walking walking walking walking
## Levels: crosstr jumping stepper walking
table(mod6a)
## mod6a
## crosstr jumping stepper walking
## 2650 2624 2599 2627
table(daily.te$id)
##
## crosstr jumping stepper walking
## 2625 2625 2625 2625
# Confusion matrix on test
table(mod6a, daily.te$id) # not bad, but we could try to tweak "k"
##
## mod6a crosstr jumping stepper walking
## crosstr 2620 2 28 0
## jumping 0 2623 1 0
## stepper 5 0 2594 0
## walking 0 0 2 2625
# Faster alternative
suppressMessages(require(FNN, quietly = T))
library(help = FNN)
?FNN::knn
start <- Sys.time()
mod6b <- FNN::knn(daily.tr[,-1], daily.te[,-1], daily.tr[,1],
k = 3, prob = T)
tempo_b <- Sys.time() - start
tempo_b # way faster!
## Time difference of 4.376986 secs
names(attributes(mod6b))
## [1] "levels" "class" "prob" "nn.index" "nn.dist"
head( attributes(mod6b)$nn.dist )
## [,1] [,2] [,3]
## [1,] 2.000269 2.008701 2.461501
## [2,] 2.235429 2.268110 2.395444
## [3,] 4.044905 4.382678 4.585129
## [4,] 3.581235 4.220713 4.220957
## [5,] 3.391058 3.767062 3.803545
## [6,] 1.812719 2.039704 2.042574
table(mod6b)
## mod6b
## crosstr jumping stepper walking
## 2650 2624 2599 2627
# Confusion matrix on test
table(mod6b, daily.te$id) # ...exactly the same as before
##
## mod6b crosstr jumping stepper walking
## crosstr 2620 2 28 0
## jumping 0 2623 1 0
## stepper 5 0 2594 0
## walking 0 0 2 2625
# Let's do it via <caret>...
# Caret package provides train() method for training our data
# for various algorithms. We just need to pass different parameter
# values for different algorithms.
# Before train() method, we will first use trainControl() method.
# It controls the computational nuances of the train() method.
# In addition, since the dataset is not that small (10k obs),
# let's go parallel to speed up the cross-validation
# To spice things up a bit, let's use only one type of sensors
suppressWarnings(require(doMC, quietly = T)) # parallel backend
registerDoMC(4) # register 4 cores
?trainControl
trctrl <- trainControl(method = "repeatedcv", number = 10, repeats = 3)
set.seed(1234) # For reproducibility
knn_fit <- train(id ~ T.xGyro + T.yGyro + T.zGyro,
data = daily.tr, method = "knn",
trControl = trctrl,
preProcess = c("center", "scale"),
tuneLength = 15 # just provide the number, not the values,
# of the tuning parameters to try (here "k",
# the number of NN)
)
# Take a look
knn_fit
## k-Nearest Neighbors
##
## 19500 samples
## 3 predictor
## 4 classes: 'crosstr', 'jumping', 'stepper', 'walking'
##
## Pre-processing: centered (3), scaled (3)
## Resampling: Cross-Validated (10 fold, repeated 3 times)
## Summary of sample sizes: 17551, 17551, 17550, 17548, 17550, 17549, ...
## Resampling results across tuning parameters:
##
## k Accuracy Kappa
## 5 0.5704440 0.4272598
## 7 0.5820517 0.4427374
## 9 0.5876239 0.4501668
## 11 0.5934705 0.4579621
## 13 0.5981196 0.4641610
## 15 0.6001540 0.4668729
## 17 0.6031969 0.4709304
## 19 0.6050771 0.4734373
## 21 0.6054697 0.4739608
## 23 0.6072137 0.4762860
## 25 0.6078462 0.4771297
## 27 0.6092819 0.4790442
## 29 0.6094524 0.4792716
## 31 0.6100165 0.4800236
## 33 0.6099997 0.4800011
##
## Accuracy was used to select the optimal model using the largest value.
## The final value used for the model was k = 31.
plot(knn_fit) # not that good now...probably we picked the wrong sensor!

# Prediction is more standard using <caret>
test_pred <- predict(knn_fit, newdata = daily.te)
head(test_pred)
## [1] walking walking walking walking walking walking
## Levels: crosstr jumping stepper walking
# How Accurately our model is working?
confusionMatrix(test_pred, daily.te$id) #...not that accurately...
## Confusion Matrix and Statistics
##
## Reference
## Prediction crosstr jumping stepper walking
## crosstr 1923 153 334 56
## jumping 197 1373 474 178
## stepper 295 474 946 288
## walking 210 625 871 2103
##
## Overall Statistics
##
## Accuracy : 0.6043
## 95% CI : (0.5949, 0.6137)
## No Information Rate : 0.25
## P-Value [Acc > NIR] : < 2.2e-16
##
## Kappa : 0.4724
##
## Mcnemar's Test P-Value : < 2.2e-16
##
## Statistics by Class:
##
## Class: crosstr Class: jumping Class: stepper
## Sensitivity 0.7326 0.5230 0.3604
## Specificity 0.9310 0.8922 0.8658
## Pos Pred Value 0.7798 0.6179 0.4723
## Neg Pred Value 0.9126 0.8488 0.8024
## Prevalence 0.2500 0.2500 0.2500
## Detection Rate 0.1831 0.1308 0.0901
## Detection Prevalence 0.2349 0.2116 0.1908
## Balanced Accuracy 0.8318 0.7076 0.6131
## Class: walking
## Sensitivity 0.8011
## Specificity 0.7834
## Pos Pred Value 0.5521
## Neg Pred Value 0.9220
## Prevalence 0.2500
## Detection Rate 0.2003
## Detection Prevalence 0.3628
## Balanced Accuracy 0.7923
# ...since we are using only 3 covariates, this failure is easy to visualize...
suppressMessages(require(plotly, quietly = T))
p <- plot_ly(daily.te, x = ~T.xGyro, y = ~T.yGyro, z = ~T.zGyro,
size = .1,
color = ~test_pred,
colors = RColorBrewer::brewer.pal(nlevels(test_pred), "Set1") ) %>%
add_markers() %>%
layout(scene = list(xaxis = list(title = 'T-xGyro'),
yaxis = list(title = 'T-yGyro'),
zaxis = list(title = 'T-zGyro')))
p